/*******************************************************************************

This code file uses a calibrated model of moves to estimate the effects of 
421a on children of inclusionary residents.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
	
* Bergman et al Section V.B

	local causal_fraction = 0.62
	
	* Options for calibration of distance-cost parameter 
	local tuning = 7 // average move distance of 10 km
	* local tuning = 2.95 // average move distance of 5 km (3.1 mi -- close moves scenario)

*** Raw data cleaning [commented out b/c I do not save the raw file]

/*
	
	* Load OI data
	use "tract_outcomes_early_dta.dta", clear

	* Keep NYC
	keep if state == 36
	keep if county == 5 | county == 81 | county == 85 | county == 61 | county == 47
	
	* Keep only relevant variable: individual rank-rank for stayers, pooled across race and gender
	keep state county tract kir_stycz_pooled_pooled*

	save "$data/raw/oi_tract_outcomes.dta", replace
	
*/

************


	*** Tenant-Based Section 8 

		import delimited "$data/raw/OtherHousingAssistance/Section8/hudPicture2015_360148.csv", encoding(ISO-8859-1) clear
		
		gen borough = substr(code,1,5)
		destring borough, replace force
		
		replace borough = 1 if borough == 36061
		replace borough = 2 if borough == 36005
		replace borough = 3 if borough == 36047
		replace borough = 4 if borough == 36081 
		replace borough = 5 if borough == 36085
		
		gen ct2010 = substr(code,6,6)
		destring ct2010, replace force
		
		drop if borough > 5
		
		rename reported assisted_unit_count
		
		collapse (sum) assisted_unit_count, by(borough ct2010)
		
		merge 1:m borough ct2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen
		
		bys borough ct2010: egen sum_units = sum(ct_occ_renter_units*sh_poor)
		gen sh_units = (ct_occ_renter_units*sh_poor) / sum_units
		replace assisted_unit_count = assisted_unit_count * sh_units
		drop sum_units sh_units
		
		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) assist ct_occ_renter_units, by(borough ct2010 cb2010)
		
		rename assisted_unit_count assisted_unit_count_tbsection8
		
		tempfile assisted_tbsection8
		save `assisted_tbsection8', replace

	*** LIHTC

		* Obtain distribution of desired match variable
		
		use "$data/clean/non_421a_units.dta", clear
		keep if bldg == 2
				
		merge m:1 borough ct2010 cb2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen

		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) assisted_unit_count ct_occ_renter_units, by(borough ct2010 cb2010)
		
		merge 1:1 borough ct2010 cb2010 using `assisted_tbsection8', nogen
		replace assisted_unit_count = assisted_unit_count + assisted_unit_count_tbsection8
		drop assisted_unit_count_tbsection8
		
		tempfile assisted_tbsection8_lihtc
		save `assisted_tbsection8_lihtc', replace


	*** Compute distribution of poor renters by NTA
	
		use "$data/clean/census_area_characteristics.dta", clear
		
		gen ct_poor_renters = ct_occ_renter_units * sh_poor
	
		* Merge in NTA codes
		merge m:1 borough ct2010 using "$data/clean/ct2010_to_nta.dta", nogen
		
		collapse (sum) ct_poor_renters, by(nta_code nta)
		egen sum_ct_poor_renters = sum(ct_poor_renters)
		gen sh_poor_renters_in_nta = ct_poor_renters / sum_ct_poor_renters
		
		keep nta_code sh_poor_renters_in_nta
		
		tempfile poorrenters_nta
		save `poorrenters_nta', replace

************

*** Collapse to NTAs

	use "$data/raw/oi_tract_outcomes.dta", clear

	* Generate borough codes
	
	gen borough = .
	replace borough = 1 if county == 61
	replace borough = 2 if county == 5
	replace borough = 3 if county == 47
	replace borough = 4 if county == 81 
	replace borough = 5 if county == 85
	drop if missing(borough)

	* Rename tract variable
	rename tract ct2010
	
	* Merge in NTA codes
	merge m:1 borough ct2010 using "$data/clean/ct2010_to_nta.dta", nogen
	
	* Prepare standard errors for collapse
	bys nta_code nta: gegen kir_stycz_pooled_pooled_n_nta = sum(kir_stycz_pooled_pooled_n)
	gen wt_var = kir_stycz_pooled_pooled_n / kir_stycz_pooled_pooled_n_nta 
	gen kir_stycz_pooled_pooled_p25_var = (wt_var * kir_stycz_pooled_pooled_p25_se)^2
	
	collapse (mean) kir_stycz_pooled_pooled_p25 (rawsum) kir_stycz_pooled_pooled_p25_var [aw=kir_stycz_pooled_pooled_n], by(nta_code nta)
	
	gen kir_stycz_pooled_pooled_p25_se = sqrt(kir_stycz_pooled_pooled_p25_var)
	drop kir_stycz_pooled_pooled_p25_var
	
*** Find value-added

	* Merge in poor-renter distribution
	merge 1:1 nta_code using `poorrenters_nta', nogen

	* Merge in lat-long coordinates
	merge 1:1 nta_code using "$data/GIS_boundaries/neighborhood_boundaries/df.dta", nogen
	
	* For each NTA, compute distance to other NTAs, use to average over other NTA's upward mobilities
	
	preserve
	clear
	tempfile master
	set obs 1
	gen nta_code = ""
	gen pct_diff = .
	save `master', replace
	restore
		
	local n = _N
	local lt1 = .
	
	forvalues i = 1/`n' {
			
		quietly {
	
		preserve
				
		keep nta_code x_centroid y_centroid kir_stycz_pooled_pooled_p25 kir_stycz_pooled_pooled_p25_se
			
		replace kir_stycz_pooled_pooled_p25 = kir_stycz_pooled_pooled_p25[`i']
		replace x_centroid = x_centroid[`i']
		replace y_centroid = y_centroid[`i']
		
		rename kir_stycz_pooled_pooled_p25 kir_dest
		rename x_centroid x_dest
		rename y_centroid y_dest
		
		tempfile nta_tmp
		save `nta_tmp', replace
		
		restore
		
		preserve
		
		merge 1:1 nta_code using `nta_tmp', nogen
		
		replace nta_code = nta_code[`i']
		
		* 1 degree latitude = 111 km

		gen dist = 111 * sqrt((x_centroid-x_dest)^2 + (y_centroid-y_dest)^2)
				
		gen exp_move = exp(-dist / `tuning')
		egen denominator = sum(sh_poor_renters_in_nta*exp_move)
		gen pr_move = sh_poor_renters_in_nta * exp_move / denominator
		drop exp_move denominator
		
		summ dist [aw=pr_move]
		local dist = r(mean)
		
		gen lt1 = dist <= 1
		summ lt1 [aw=pr_move]
		local lt1 = r(mean)
		
		gen lt10 = dist <= 10
		summ lt10 [aw=pr_move]
		local lt10 = r(mean)
		
		if !missing(kir_dest) {
		
			gen kir_new = kir_stycz_pooled_pooled_p25 + `causal_fraction'*(kir_dest - kir_stycz_pooled_pooled_p25)
			
			convert_rank_dollar kir_new, kir variable multiply100
			rename dollar_amount dollar_new
			
			convert_rank_dollar kir_stycz_pooled_pooled_p25, kir variable multiply100
			rename dollar_amount dollar_old
			
			gen pct_diff = log(dollar_new/dollar_old)
			
			collapse (first) nta_code (mean) pct_diff dist [aw=pr_move]
			
			append using `master'
			save `master', replace
			
		
		}
		
		restore
		
		}
		
		* Confirming  move distances
		di "`i': average move dist = `dist' km"
		* di "`i': % move within 1 mi = `lt1', % move within 10 mi = `lt10'"
	
	}

*** Save

	use `master', clear
	drop if missing(nta_code)
	
	save "$data/raw/oi_pred_effect_by_nta.dta", replace
	
